###############################################################################
## Simulation by calibrating from 1999 estimates -- II. Simulate for S times ##
## Model refers to "Distribution Regression with Selection"
## Authors: Chernozhukov, V., I. Fernandez-Val and S. Luo
## Last Update: 6/17/2018





rm(list=ls());


library(parallel);
library(sampleSelection);
library(maxLik);
library(mvtnorm);
library(abind);
library(MASS);



#####################
# Extract Arguments #
#####################
#Define default values for variables
n.boot         <- 200;
sample.id      <- 113109;
s.sim          <- 1;


#Get aruments from the command line
argv <- commandArgs(TRUE);

# Check if the command line is not empty and convert values to numerical values
if(length(argv) > 0){
  n.boot         <- as.numeric(argv[1]);
  sample.id      <- as.numeric(argv[2]);
  s.sim          <- as.numeric(argv[3]);
};


str.sample <- paste("_",sample.id,sep="");


################################################
# Source the file which contains the functions #
################################################
load(paste("Est_female",str.sample,".RData",sep=""));
source("DRS_Source.R");


####################
# Parallel Setting #
####################

# Calculate the namber of cores
n.cores <- as.numeric(Sys.getenv("NSLOTS"))
if(is.na(n.cores))n.cores<-1


##################################
## Step 2~3: Simulation Results ##
##################################
current.na.action <- options('na.action');
options(na.action='na.pass');
Xwc.all <- model.matrix(outcome.formula, mysample);
X.name  <- colnames(Xwc.all);
n.par <- length(X.name);

options(current.na.action);

n.obs <- NROW(mysample);



###############################
## Step 2: Construct dataset ##
###############################
set.seed(s.sim);
v <- rnorm(n.obs);
u <- rnorm(n.obs);


mysample$Dsim <- 1*(Zwc.all %*% pi.Heckman.f + v >= 0);
mysample$Ysim <- Xwc.all %*% beta.Heckman.f + rho.Heckman.f*sigma.Heckman.f*v + sqrt(1-rho.Heckman.f^2)*sigma.Heckman.f*u;

mysample$Ysim[mysample$Dsim==0] <- NA;


########################
## Step 3: Estimation ##
########################
outcome.formula.sim <- update(outcome.formula, Ysim ~.);
selection.formula.sim <- update(selection.formula, Dsim ~.);

elist.sim.f <- Estimation(mysample, ys, outcome.formula.sim, selection.formula.sim, rho.formula, initial.val=numeric(0));



##########################
## Step 4: Construct CB ##
##########################
rho.name <- paste("delta.",colnames(model.matrix(rho.formula, mysample)),sep="");



theta.f <- elist.sim.f$beta[,c(X.name,rho.name)]; # n.thres-by-(K+3) matrix

pi.f <- elist.sim.f$pi[1,];


phi.para.fl <- mclapply(as.list(ys), phi.theta, mc.cores=n.cores,
                        parameter.ys=theta.f, ys.ind=ys, pi=pi.f, data.local=mysample, 
                        outcome.formula=outcome.formula.sim, selection.formula=selection.formula.sim, 
                        rho.formula=rho.formula,
                        counter.id=counter.id, population=2); # list of n.thres, each N-by-(K+1) vector 


phi.para.f    <- do.call(abind, list(phi.para.fl,along=3));

var.f <- colSums(phi.para.f^2, na.rm=T)/NROW(phi.para.f)^2; # (K+1)*n.thres

rm(phi.para.fl,u,v);


library(pryr);
print(mem_used());
boot.f.l <- mclapply(1:n.boot, boot.func, mc.cores=n.cores, phi=phi.para.f, variance=var.f, 
                     var.boot=0);

rm(phi.para.f);




boot.f <- do.call(abind,list(boot.f.l,along=3)); # (n.thres+n.thres)*(n.par+n.delta)*B matrix -- (thres,para,b)

t.f <- boot.f[(n.thres+1):(2*n.thres), ,]; # n.thres*(n.par+n.delta)*B


sim.outcome <- abind(theta.f, t(var.f), t.f, along=3); # n.thres*(n.par+n.delta)*(2+B)

sim <- rbind(elist.sim.f$pi,
             cbind(sim.outcome,matrix(NA,ncol=length(Z.name)-length(X.name)-1,nrow=NROW(sim.outcome)))); # (2*n.thres+3)*(n.par+2);

###############################
# Save the simulation results #
###############################
saveRDS(sim, paste(getwd(),"/Simulations/Sim_f",str.sample,s.sim,".rds",sep=""));

